We study data on Mongolia and Japan.
knitr::include_url("https://icu-hsuzuki.github.io/science/index-j.html")
Sys.setenv(LANG = "en")
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(WDI)
url_edu1 <- "https://www.gender.go.jp/about_danjo/whitepaper/r03/zentai/html/honpen/csv/zuhyo01-05-01.csv"
download.file(url_edu1, destfile = "data/edu1.csv")
guess_encoding("data/edu1.csv")
df_edu1 <- read_csv("data/edu1.csv", locale = locale(encoding = "Shift-JIS"), skip = 2)
## Rows: 71 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): 年度
## dbl (9): 高等学校等(男子), 高等学校等(女子), 専修学校(専門課程,男子), 専修学校(専門課程,女子), 大学(学部,男子), 大学(学...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_edu1
colnames(df_edu1)
## [1] "年度" "高等学校等(男子)"
## [3] "高等学校等(女子)" "専修学校(専門課程,男子)"
## [5] "専修学校(専門課程,女子)" "大学(学部,男子)"
## [7] "大学(学部,女子)" "短期大学(本科,女子)"
## [9] "大学院(男子)" "大学院(女子)"
df_edu1$年度
## [1] "昭和25" "26" "27" "28" "29" "30" "31" "32"
## [9] "33" "34" "35" "36" "37" "38" "39" "40"
## [17] "41" "42" "43" "44" "45" "46" "47" "48"
## [25] "49" "50" "51" "52" "53" "54" "55" "56"
## [33] "57" "58" "59" "60" "61" "62" "63" "平成元"
## [41] "2" "3" "4" "5" "6" "7" "8" "9"
## [49] "10" "11" "12" "13" "14" "15" "16" "17"
## [57] "18" "19" "20" "21" "22" "23" "24" "25"
## [65] "26" "27" "28" "29" "30" "令和元" "2"
df_edu0 <- df_edu1
colnames(df_edu0) <- c("year", "highschool_m", "highschool_f", "vocational_m", "vocational_f", "university_m", "university_f", "juniorcol_f", "gradschool_m", "gradschool_f")
df_edu00 <- df_edu0 %>% mutate(year = 1950:2020,
highschool = (highschool_m + highschool_f)/2,
vocational = (vocational_m + vocational_f)/2,
university = (university_m + university_f)/2,
juniorcol = juniorcol_f,
gradschool = (gradschool_m + gradschool_f)/2)
df_edu00 %>% select(-(2:10)) %>%
pivot_longer(!year, names_to = "schools", values_to = "percentage") %>%
ggplot(aes(x = year, y = percentage, color = schools)) +
geom_line()
## Warning: Removed 49 rows containing missing values (`geom_line()`).
df_edu11 <- df_edu1 %>% mutate(年 = 1950:2020,
高等学校 = (`高等学校等(男子)`+ `高等学校等(女子)`)/2,
専修学校 = (`専修学校(専門課程,男子)`+ `専修学校(専門課程,女子)`)/2,
大学 = (`大学(学部,男子)` + `大学(学部,女子)`)/2,
短期大学 = `短期大学(本科,女子)`,
大学院 = (`大学院(男子)` + `大学院(女子)`)/2) %>%
select(-(1:10))
df_edu11
df_edu11 %>%
pivot_longer(2:6, names_to = "学校", values_to = "進学率") %>%
ggplot(aes(x = 年, y = 進学率, color = 学校)) +
geom_line()
df_edu11 %>%
pivot_longer(2:6, names_to = "学校", values_to = "進学率") %>%
ggplot(aes(x = 年, y = 進学率, color = 学校)) +
geom_line() +
theme_gray (base_family = "HiraginoSans-W3") # or base_family = "HiraKakuPro-W3"
## Warning: Removed 49 rows containing missing values (`geom_line()`).
df_edu1 %>% mutate(year = 1950:2020) %>%
pivot_longer(2:10, names_to = "学校", values_to = "進学率") %>%
ggplot(aes(x = year, y = 進学率, color = 学校)) +
geom_line() +
theme_gray (base_family = "HiraKakuPro-W3") # or base_family = "HiraginoSans-W3"
## Warning: Removed 94 rows containing missing values (`geom_line()`).
#{r dev='rag_png'} df_edu1 %>% mutate(year = 1950:2020) %>% pivot_longer(2:10, names_to = "学校", values_to = "進学率") %>% ggplot(aes(x = year, y = 進学率, color = 学校)) + geom_line()